import scrapy
from bs4 import BeautifulSoup
from ..items import DoubanItem

class DoubanSpider(scrapy.Spider):
    name = 'douban'
    allowed_domains = ['book.douban.com/top250']
    start_urls = ['https://book.douban.com/top250']

    def parse(self, response):
        soup = BeautifulSoup(response.text, 'html.parser')
        divs = soup.findAll('div', class_='pl2')
        publish_tags = soup.select('p.pl')
        rate_tags = soup.select('span.rating_nums')
        titles = [div.find_next('a').get('title') if div.find_next('a').get('title') else '' for div in divs]
        publish_list = [p.text if p.text else '' for p in publish_tags]
        rates = [rate_tag.text if rate_tag.text else '' for rate_tag in rate_tags]
        for title, publish, rate in zip(titles, publish_list, rates):
            doubanItem = DoubanItem()
            doubanItem['title'] = title
            doubanItem['publish'] = publish
            doubanItem['rate'] = rate
            yield doubanItem
